package com.itany.crawler;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class IndexHandle extends UrlBaseHandle {
    String url_index = "http://www.bookschina.com/";

    public IndexHandle() {
        setUrl(url_index);
    }

    public IndexHandle(String url, boolean proxyed) {
        super(url, proxyed);
    }

    public IndexHandle(boolean proxyed) {
        super(proxyed);
        setUrl(url_index);
    }

    @Override
    public String handle(Urls urls) throws Exception {
        //获取页面html代码
        String html = doGet();
        //将html字符串解析成dom对象
        Document document = Jsoup.parse(html, "UTF-8");
        //通过class选择器获取当前页面的对应元素
        Elements plist = document.getElementsByClass("c-category-list");
        for (int i = 0; i < plist.size(); i++) {
            Element p = plist.get(i);
            Elements alist = p.getElementsByTag("a");
            for (int j = 0; j < alist.size(); j++) {
                System.out.println(alist.get(j).attr("href") + "==================" + alist.get(j).text());
                String absUrl = getUrl() + alist.get(j).attr("href");
                SecondHandle secondHandle = new SecondHandle(absUrl, this.isProxyed());
                urls.add(secondHandle);
            }

        }

        return "主页处理";
    }
}
